# !/usr/bin/python3
# -*- coding: utf-8 -*-
import time
import random

import httpx
from bs4 import BeautifulSoup

from hupu_nba_detail import HupuDetailSpider


detail_spider = HupuDetailSpider()


def get_detail(url, title):
    print('新闻详情 title', title)
    detail_spider.get_detail(url)
    time.sleep(random.randint(3, 5))


def get_shh_detail(url, title):
    print('湿乎乎详情 title', title)
    detail_spider.get_shh_detail(url)
    time.sleep(random.randint(3, 5))


def get_shh_list(content_list):
    for shh_item in content_list:
        if shh_item.find('div') is not None:
            shh_item_href = shh_item.get('href')
            shh_item_title = shh_item.find('div').find(class_='text').text
            print('shh_item_link', shh_item_href)
            get_shh_detail(shh_item_href, shh_item_title)


def get_new_list(item_list):
    for item in item_list:
        title = item.find('a').text
        detail_link = item.find('a').get('href')
        print('link', detail_link)
        # print(title)
        get_detail(detail_link, title)


class HupuSpider:

    def __init__(self):
        self.url = "http://nba.hupu.com/"
        # 定义请求头
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"
        }

    def get_list_url(self):
        content = self.get_content(self.url)

        html = str(content, 'utf-8')
        # print(html)
        soup = BeautifulSoup(html, 'lxml')

        print('首页')
        list_shh_item = soup.find_all('div', class_='shh-item')
        # print(list_shh_item)
        # print(len(list_shh_item))
        # print(list_shh_item[0])

        print('新闻')
        list_news = soup.find(class_='list list-news')
        # print(list_news)
        item_list = list_news.findAll(class_='list-item')
        get_new_list(item_list)

        print('焦点')
        item_shh_item_focus = list_shh_item[0].find_all('a')
        get_shh_list(item_shh_item_focus)

        print('话题')
        item_shh_item_topic = list_shh_item[1].find_all('a')
        get_shh_list(item_shh_item_topic)

        print('新鲜')
        item_shh_item_fresh = list_shh_item[2].find_all('a')
        get_shh_list(item_shh_item_fresh)

    def get_content(self, url):
        client = httpx.Client(http2=True, verify=False)
        response = client.get(url=url, headers=self.headers)
        return response.content

    def run(self):
        self.get_list_url()


if __name__ == '__main__':
    spider = HupuSpider()
    spider.run()
